#!/usr/bin/perl 
use strict;
use warnings;
use File::Spec::Functions;

our %params = (
    dmoz_dir     => '/home/xuqy/Desktop/dmoz',
);

my ($output_file, $sample_ratio) = @ARGV;

chdir $params{dmoz_dir} or die "$!";

open OUT, "> $params{output_file}" or die "$!";

my @dirs = ('Top');
my $count = 0;
while (@dirs) {
    my $dir = shift @dirs; 
    my $list_file = catfile($dir, $params{list_file});
    if (-e $list_file) {
        open LIST, "< $list_file" or die "$!";
        while (<LIST>) {
            if (not defined $sample_ratio or $count % $sample_ratio == 0) {
                print OUT $_;
                $count++;
            }
        }
        close LIST;
    }
    opendir DIR, $dir or die "cannot open $dir: $!";
    my @subDirs;
    while (my $entry = readdir DIR) {
        next if $entry =~ /^\.{1,2}$/;
        my $subDir = catfile($dir, $entry);
        next if not -d $subDir or -l $subDir;
        push @subDirs, $subDir;
    }
    closedir DIR;
    unshift @dirs, @subDirs;
}
close OUT;
print "Done! $count urls obtained!\n";
